#files and directory paths
csvLocation = "dataset_working/feature_extraction/feature_extract_srinath.csv"
setwd('D:/HP_Win10_OneDrive/Study/OVGU/University/Summer-2021/DSR/Github/DataScienceR') # change path accordingly 

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.1     v stringr 1.4.0
## v tidyr   1.1.3     v forcats 0.5.1
## v readr   1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(tm)
## Loading required package: NLP
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
library(ggplot2)
library(syuzhet)
#install.packages("plotly")
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
#install.packages("treemapify")
library(treemapify)

#Read the features extracted
features_Data <- read.csv(file = csvLocation)
glimpse(features_Data)
## Rows: 29,800
## Columns: 18
## $ Author_Id    <chr> "06ct0t68y1acizh9eow3g5rhancrppr8", "06ct0t68y1acizh9eow3~
## $ Tweet_Id     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17~
## $ Tweet_Text   <chr> "courteney cox recreates classic friends scene real life ~
## $ Target       <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ Syuzhet      <dbl> 0.50, 0.80, 3.15, 0.40, 1.15, 0.75, 0.00, 0.50, 0.20, 0.0~
## $ anger        <int> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, ~
## $ anticipation <int> 0, 0, 0, 0, 3, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 4, 1, 0, 0, ~
## $ disgust      <int> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ~
## $ fear         <int> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 3, 0, 0, ~
## $ joy          <int> 0, 0, 2, 0, 2, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 3, 0, 1, 0, ~
## $ sadness      <int> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, ~
## $ surprise     <int> 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, ~
## $ trust        <int> 1, 1, 1, 0, 2, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 0, ~
## $ positive     <int> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, ~
## $ negative     <int> 2, 0, 2, 1, 4, 1, 0, 0, 1, 0, 1, 1, 1, 0, 3, 3, 1, 1, 2, ~
## $ Bing         <int> 0, 0, 3, 0, -1, 1, 0, 1, -1, 0, 0, 1, 1, -1, 2, 1, -2, 1,~
## $ Afinn        <int> 0, 0, 8, 0, 0, 3, 0, 3, 2, 3, 0, 3, 3, 2, 6, 0, -2, 2, 0,~
## $ Nrc          <int> 2, 0, 2, 1, 3, 0, 0, 0, 1, 0, 1, 1, 1, -1, 3, 3, 0, 0, 2,~
#Start of sentiments visualizations
sentiments_df <- features_Data %>%
  select(Syuzhet, Bing, Afinn, Nrc)

sentiment_types_df <- data.frame(matrix(ncol=4,nrow=0, dimnames=list(NULL, c("syuzhet", "bing", "afinn", "nrc"))))

syuzhet = 0
bing = 0
afinn = 0
nrc = 0

for (row in 1:nrow(sentiments_df)) {
  
  syuzhet = syuzhet + sentiments_df[row, c(1)]
  bing = bing + sentiments_df[row, c(2)]
  afinn = afinn + sentiments_df[row, c(3)]
  nrc = nrc + sentiments_df[row, c(4)]
  
  if(row %% 100 == 0){
    sentiment_types_df[nrow(sentiment_types_df) + 1,] = c(syuzhet/100, bing/100, afinn/100, nrc/100)
    syuzhet = 0
    bing = 0
    afinn = 0
    nrc = 0
  }
}

plot_ly(sentiment_types_df, y=~syuzhet, type="scatter", mode="jitter", name="syuzhet") %>%
  add_trace(y=~bing, mode="lines", name="bing") %>%
  add_trace(y=~afinn, mode="lines", name="afinn") %>%
  add_trace(y=~nrc, mode="lines", name="nrc") %>%
  layout(title="Different type of sentiments for Author Tweets",
         yaxis=list(title="Score"), xaxis=list(title="Number of tweets"))
#End of sentiments visualizations

#Start of Emotions Visualizations
emotions_df <- features_Data %>%
  select(anger, anticipation, disgust, fear, joy, sadness, surprise, trust)

emotions_df_column_count = colSums(emotions_df)
emotions_df_column_count_df = data.frame(count=emotions_df_column_count, Different_Emotions=names(emotions_df_column_count))

emotions_df_column_count_df <- emotions_df_column_count_df %>%
  mutate(percent = count / sum(count) * 100)

##Start of Pie chart
ggplot(emotions_df_column_count_df, 
       aes(x = "", 
           y = percent, 
           fill = Different_Emotions)) +
  geom_bar(width = 1, 
           stat = "identity", 
           color = "black") +
  geom_text(aes(label = paste0(Different_Emotions, "\n", round(percent,2))),
            position = position_stack(vjust = 0.5),
            color = "black") +
  coord_polar("y", 
              start = 0, 
              direction = -1) +
  theme_void() +
  theme(legend.position = "FALSE") +
  labs(title = "Tweets Emotion Analysis")

##End of Pie chart
#End of Emotions Visualizations

#Start of Positive Visualizations
positive_df <- features_Data %>%
  select(positive, negative)

positive_df_column_count = colSums(positive_df)
positive_df_column_count_df = data.frame(count=positive_df_column_count, Sentiment=names(positive_df_column_count))

positive_df_column_count_df <- positive_df_column_count_df %>%
  mutate(percent = count / sum(count) * 100)

qplot(Sentiment, data=positive_df_column_count_df, weight=percent, geom="bar",fill=Sentiment, ylab="Percentage", xlab="Sentiment")+ggtitle("Tweets Sentiment Analysis")

#End of Negative Visualizations

#Start of Tweet Analysis Based On Author ID
authorId <- 99

getAuthorBasedTweets <- function(author_Id){
  start <- (author_Id * 100) - 100
  end <- author_Id * 100
  
  author_features_Data <- features_Data %>% 
                            slice(start:end)
  
  author_emotions_df <- author_features_Data %>%
    select(anger, anticipation, disgust, fear, joy, sadness, surprise, trust)
  
  author_emotions_df_column_count = colSums(author_emotions_df)
  author_emotions_df_column_count_df = data.frame(count=author_emotions_df_column_count, Different_Emotions=names(author_emotions_df_column_count))
  
  author_emotions_df_column_count_df <- author_emotions_df_column_count_df %>%
    mutate(percent = count / sum(count) * 100)
  
  
  qplot(Different_Emotions, data=author_emotions_df_column_count_df, weight=percent, geom="bar",fill=Different_Emotions, ylab="Percentage", xlab="Emotions")+ggtitle("Author Emotion Analysis")
}

getAuthorBasedTweets(authorId)

#End of Tweet Analysis Based On Author ID